from bs4 import BeautifulSoup
# 打开html文件对象
html = open('1.html')
# 载入html内容，使用lxml解析器进行解析
soup = BeautifulSoup(html, 'lxml')


# 获得<title>标签
title = soup.title
print(title)

# 获得<body>标签中第一个<b>标签
b = soup.body.b
print(b)

# 获得html中第一个a标签
a = soup.find('a')

# 获得所有<a>标签
a_all1 = soup.find_all('a')
print(a_all1)

# 过滤id1为link1的a标签
a_all2 = soup.find_all('a', id='link1')
print(a_all2)

# 过滤class为website的a标签
a_all3 = soup.find_all('a', class_='website')
print(a_all3)
